tweets %>%
select(tweet, date) %>%
# slice(., sample(1:n())) #random selection
arrange(date)
df_comp %>%
filter(LEMMA == 'hyperlocal') %>%
select(LEMMA, USES_TOT, SUBSET, USES)
df_comp %>%
filter(
LEMMA %in% cases,
SUBSET == 'full'
) %>%
arrange(desc(USES_TOT))
df_comp %>%
filter(
SUBSETTING == 'time',
SUBSET == 'one'
) %>%
select(LEMMA, USES_TOT, USES) %>%
arrange(USES)
cent_diac_incr <- c('robo-signing', 'circular economy', 'alt-right', 'Brexiteer')
cent_diac_const <- c('upskill')
cent_diac_decr <- c('blockchain', 'newsjacking', 'overtourism')
df_comp %>%
select(LEMMA, SUBSETTING, SUBSET, CENT_DEGREE, CENT_EV, DENSITY) %>%
filter(
SUBSET != 'full',
LEMMA %in% c(cases)
# LEMMA %in% c(cases, 'hyperlocal', 'blockchain', 'climate denial', 'man bun', 'upskill', 'deep learning')
# overtourism: diff too big for case study scale;
# , 'broflake', 'climate crisis', 'incel', 'overtourism'
) %>%
ggplot(., aes(x=SUBSET, y=CENT_DEGREE)) + # group=1
geom_point(aes(group=LEMMA, color=LEMMA, shape=LEMMA)) +
geom_line(aes(group=LEMMA, color=LEMMA, linetype=LEMMA)) +
guides(group=TRUE) +
ggtitle('Diffusion over time: changes in degree centralization') +
scale_y_continuous('degree centrality') +
scale_x_discrete('subset')
# ggplotly(plt)
ggsave('out/cases_cent_diac.pdf', width=6, height=4)
df_comp %>%
filter(
LEMMA %in% cases,
SUBSET == 'full'
) %>%
select(
LEMMA,
EDGES,
CENT_DEGREE,
CENT_EV
) %>%
arrange((CENT_DEGREE))
df_comp %>%
filter(SUBSET == 'full') %>%
arrange(desc(USES_TOT))
df_comp %>%
select(LEMMA, SUBSET, USES_TOT, CENT_DEGREE, CENT_EV) %>%
filter(
SUBSET == 'full'
# USES >= 2
) %>%
arrange(
(CENT_DEGREE)
# desc(CENT_EV)
)
plt <- df_comp %>%
select(LEMMA, SUBSET, USES, CENT_DEGREE) %>%
filter(SUBSET == 'full') %>%
arrange((CENT_DEGREE)) %>%
ggplot(., aes(x=CENT_DEGREE, y=reorder(LEMMA, CENT_DEGREE))) +
geom_point() +
scale_y_discrete('lemmas') +
scale_x_continuous(
'degree centralization (log)',
trans='log'
)
plt
# ggsave('out/cent_sync_all.pdf', width=6, height=4)
df_comp %>%
filter(
SUBSET %in% c('one', 'two', 'three', 'four'),
LEMMA %in% unsuccessful_diffs
# USES_TOT >= 10000
# USES > 1000
) %>%
group_by(SUBSET) %>%
summarize(
DENS_AVG = mean(DENSITY),
CENT_AVG = mean(CENT_DEGREE)
) %>%
ggplot(., aes(x=SUBSET, y=DENS_AVG, group=1)) +
geom_line() +
geom_point() +
scale_y_continuous('degree centralization') +
scale_x_discrete('subsets')
# ggsave('out/full_cent_diac.pdf', width=6, height=4)
df_comp %>%
select(LEMMA, SUBSET, CENT_DEGREE, EDGES, USES_TOT) %>%
filter(
SUBSET %in% c(
'one',
'four'
),
USES_TOT >= 10000
) %>%
dplyr::group_by(LEMMA) %>%
dplyr::mutate(CENT_DIFF = CENT_DEGREE - lag(CENT_DEGREE, default=CENT_DEGREE[1])) %>%
drop_na() %>%
select(-SUBSET) %>%
rename(
CENT_LAST = CENT_DEGREE,
EDGES_LAST = EDGES
) %>%
arrange((CENT_DIFF))
df_comp %>%
select(LEMMA, SUBSET, USES_TOT, CENT_DEGREE, DENSITY) %>%
filter(SUBSET == 'four') %>%
arrange(DENSITY)
ggplotly(plt)
The shape palette can deal with a maximum of 6 discrete values because more than 6 becomes difficult to
discriminate; you have 8. Consider specifying shapes manually if you must have them.
df_comp %>%
filter(
SUBSET %in% c('one', 'two', 'three', 'four'),
LEMMA %in% unsuccessful_diffs,
USES_TOT >= 10000
) %>%
group_by(SUBSET) %>%
summarize(
DENS_AVG = mean(DENSITY)
) %>%
ggplot(., aes(x=SUBSET, y=DENS_AVG, group=1)) +
geom_line() +
geom_point() +
scale_y_continuous('density') +
scale_x_discrete('subsets')
df_comp %>%
select(LEMMA, SUBSET, CENT_DEGREE, DENSITY, EDGES, USES_TOT) %>%
filter(
SUBSET %in% c(
'one',
'four'
)
# USES_TOT >= 10000
) %>%
dplyr::group_by(LEMMA) %>%
dplyr::mutate(DENS_DIFF = DENSITY - lag(DENSITY, default=DENSITY[1])) %>%
drop_na() %>%
select(-SUBSET) %>%
rename(
DENS_LAST = DENSITY,
EDGES_LAST = EDGES
) %>%
arrange((DENS_DIFF))
df_comp %>%
filter(
SUBSET == 'four',
# USES_TOT %in% (150000:500000)
# LEMMA %in% c(cases)
# !LEMMA %in% c('slut shaming', 'dashcam', 'shareable', 'cuckold', 'deep learning', 'hyperlocal')
) %>%
select(LEMMA, CENT_DEGREE, USES_TOT, USES, EDGES) %>%
ggplot(., aes(x=CENT_DEGREE, y=USES_TOT)) +
geom_text(aes(label=LEMMA), hjust=-0.1, vjust=-0.1) +
# geom_point() +
scale_y_continuous(
'usage frequency (log)',
trans='log'
) +
scale_x_continuous(
'degree centralization',
trans='log'
)
# ggplotly(plt)
ggsave('out/full_cent_freq_overall.pdf', width=6, height=4)
df_cases <- df_comp %>%
filter(
SUBSET == 'full',
LEMMA %in% c(cases),
!LEMMA %in% c('poppygate', 'upskill')
)
cases_freq_min <- df_cases %>%
select(USES_TOT) %>%
arrange(USES_TOT) %>%
slice(1:1) %>%
pull(USES_TOT)
cases_freq_max <- df_cases %>%
select(USES_TOT) %>%
arrange(desc(USES_TOT)) %>%
slice(1:1) %>%
pull(USES_TOT)
df_comp %>%
filter(
SUBSET == 'full',
USES_TOT >= cases_freq_min,
USES_TOT <= cases_freq_max,
!LEMMA %in% c('big dick energy', 'slut shaming', 'cuckold', 'shareable', 'Brexiteer', 'incel', 'dashcam')
) %>%
select(LEMMA, CENT_DEGREE, USES_TOT, USES, EDGES) %>%
ggplot(., aes(x=CENT_DEGREE, y=USES_TOT)) +
geom_text(aes(label=LEMMA), color='black', hjust=-0.1, vjust=-0.1) +
geom_point() +
geom_text(data=df_cases, aes(label=LEMMA), color='blue', hjust=-0.1, vjust=-0.1) +
# geom_point() +
scale_y_continuous(
'usage frequency (log)',
trans='log'
) +
scale_x_continuous(
'degree centralization'
# trans='log'
)
# ggsave('out/cases_cent_freq_overall.pdf', width=6, height=4)
df_comp %>%
filter(
SUBSETTING == 'time',
SUBSET == 'four'
) %>%
select(LEMMA, USES_TOT, CENT_DEGREE) %>%
mutate(DISC = USES_TOT / CENT_DEGREE) %>%
arrange(DISC)
df_corr <- df_comp %>%
filter(
# SUBSET != 'full'
# EDGES >= 100
) %>%
select(-c(LEMMA, SUBSET, START, END, SKIP, STAMP))
cor.test(df_corr$USES, df_corr$CENT_DEGREE)
Pearson's product-moment correlation
data: df_corr$USES and df_corr$CENT_DEGREE
t = -2.3698, df = 509, p-value = 0.01817
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1894877 -0.0178851
sample estimates:
cor
-0.1044639
df_comp %>%
filter(
SUBSET == 'full',
USES_TOT >= 1000
) %>%
select(LEMMA, USES_TOT, COEF_VAR) %>%
arrange(desc(COEF_VAR))
df_comp %>%
select(LEMMA, SUBSET, STAMP) %>%
filter(SUBSET == 'four') %>%
# mutate(STAMP = as_datetime(STAMP)) %>%
arrange(desc(STAMP))
df_comp %>%
filter(SUBSET == 'full') %>%
select(LEMMA, SUBSET, USES_TOT, USERS_TOT) %>%
dplyr::summarise(
USES_ALL = sum(USES_TOT),
USERS_ALL = sum(USERS_TOT)
)